In [54]:
import tensorflow as tf
from tensorflow.contrib.framework.python.framework import checkpoint_utils

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
import numpy as np

In [20]:
MODEL_DIR_OCCUPATION = 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103117/model_dir/model.ckpt-100000'
MODEL_DIR_GENDER = 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_warmstart_biosbias_glove/20190404_151521/model_dir/model.ckpt-191000'

Extract two matrices.


In [23]:
var_list = checkpoint_utils.list_variables(MODEL_DIR_OCCUPATION)
for v in var_list:
    print(v)


('dense/bias', [1])
('dense/bias/Adam', [1])
('dense/bias/Adam_1', [1])
('dense/kernel', [256, 1])
('dense/kernel/Adam', [256, 1])
('dense/kernel/Adam_1', [256, 1])
('dense_1/bias', [128])
('dense_1/bias/Adam', [128])
('dense_1/bias/Adam_1', [128])
('dense_1/kernel', [256, 128])
('dense_1/kernel/Adam', [256, 128])
('dense_1/kernel/Adam_1', [256, 128])
('dense_2/bias', [33])
('dense_2/bias/Adam', [33])
('dense_2/bias/Adam_1', [33])
('dense_2/kernel', [128, 33])
('dense_2/kernel/Adam', [128, 33])
('dense_2/kernel/Adam_1', [128, 33])
('embeddings', [400002, 100])
('global_step', [])
('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias', [256])
('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias/Adam', [256])
('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias/Adam_1', [256])
('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel', [356, 256])
('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel/Adam', [356, 256])
('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel/Adam_1', [356, 256])
('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias', [512])
('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias/Adam', [512])
('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias/Adam_1', [512])
('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel', [356, 512])
('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel/Adam', [356, 512])
('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel/Adam_1', [356, 512])
('signal_early_stopping/STOP', [])
('title/beta1_power', [])
('title/beta2_power', [])

In [99]:
kernel_occupation = np.transpose(checkpoint_utils.load_variable(MODEL_DIR_OCCUPATION, 'dense_2/kernel'))

In [100]:
var_list = checkpoint_utils.list_variables(MODEL_DIR_GENDER)
for v in var_list:
    print(v)


('beta1_power', [])
('beta2_power', [])
('dense/bias', [1])
('dense/kernel', [256, 1])
('dense_1/bias', [128])
('dense_1/kernel', [256, 128])
('embeddings', [400002, 100])
('final_layer/bias', [33])
('final_layer/bias/Adam', [33])
('final_layer/bias/Adam_1', [33])
('final_layer/kernel', [128, 33])
('final_layer/kernel/Adam', [128, 33])
('final_layer/kernel/Adam_1', [128, 33])
('global_step', [])
('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias', [256])
('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel', [356, 256])
('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias', [512])
('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel', [356, 512])
('signal_early_stopping/STOP', [])

In [101]:
kernel_gender = np.transpose(checkpoint_utils.load_variable(MODEL_DIR_GENDER, 'final_layer/kernel'))

Compute cosine.


In [102]:
TITLE_LABELS = [
    'accountant', 'acupuncturist', 'architect', 'attorney', 'chiropractor', 'comedian', 'composer', 'dentist',
    'dietitian', 'dj', 'filmmaker', 'interior_designer', 'journalist', 'landscape_architect', 'magician',
    'massage_therapist', 'model', 'nurse', 'painter', 'paralegal', 'pastor', 'personal_trainer',
    'photographer', 'physician', 'poet', 'professor', 'psychologist', 'rapper',
    'real_estate_broker', 'software_engineer', 'surgeon', 'teacher', 'yoga_teacher']

In [103]:
kernel_gender.shape


Out[103]:
(33, 128)

In [104]:
kernel_gender_female = normalize(kernel_gender[0].reshape(1, -1))
kernel_gender_male = normalize(kernel_gender[1].reshape(1, -1))

In [105]:
kernel_gender_male


Out[105]:
array([[ 0.01618018,  0.1003583 , -0.0723118 ,  0.06453013,  0.22758739,
         0.06790616,  0.08027411,  0.10015733, -0.05590729,  0.023418  ,
         0.06255525, -0.02604564,  0.09049062, -0.01601316,  0.08945937,
        -0.11582728,  0.06244883,  0.07855629,  0.01956639, -0.06774757,
         0.00614625, -0.03594974,  0.0652191 , -0.05078628, -0.00807877,
         0.06896302,  0.11013658, -0.04664179,  0.11593511,  0.17774113,
         0.09496382,  0.12176205,  0.04098931, -0.0970282 ,  0.02898299,
         0.10654851, -0.13562816,  0.03486229,  0.12194955,  0.02276845,
         0.04589143, -0.06606348, -0.00129113, -0.07973252, -0.02630814,
        -0.09769032, -0.1640446 , -0.07602697,  0.00429134,  0.06098389,
         0.02934178, -0.07209212, -0.11304612,  0.29547158, -0.04287611,
        -0.04518875, -0.02993831,  0.06304532,  0.07989506, -0.09601919,
         0.20816126, -0.1977993 ,  0.15119584,  0.01456547,  0.06435941,
        -0.07794361, -0.00554093,  0.05497926,  0.0931736 ,  0.22706528,
        -0.08019326, -0.0819607 ,  0.04490028, -0.01723337,  0.04124108,
         0.13199665, -0.01417105,  0.0725795 , -0.05172402, -0.13563272,
        -0.07302421,  0.24843292,  0.14667384, -0.02692026,  0.15892392,
         0.02655477, -0.00804625,  0.00184608,  0.02203059,  0.00078905,
         0.0115315 ,  0.00199543,  0.05942026,  0.07089076, -0.04697848,
        -0.01500242, -0.02432874, -0.02453819, -0.13443194, -0.00370577,
        -0.03219581, -0.07874984, -0.05446392,  0.05492223, -0.11461313,
        -0.00379655,  0.01339969, -0.01030909,  0.0601744 ,  0.00417376,
        -0.02308951, -0.1329045 , -0.00130105,  0.0959954 ,  0.03397062,
         0.11269465,  0.00561908,  0.00870924,  0.0339431 ,  0.01517005,
        -0.05439634, -0.02544309, -0.13284749,  0.04113958,  0.03033615,
        -0.08890872, -0.09986325, -0.09274729]], dtype=float32)

In [106]:
kernel_gender_mean = normalize((kernel_gender_female + kernel_gender_male) / 2)
direction_male = kernel_gender_male - np.sum(np.multiply(kernel_gender_male, kernel_gender_mean))* kernel_gender_mean
direction_female = kernel_gender_female - np.sum(np.multiply(kernel_gender_female, kernel_gender_mean))* kernel_gender_mean

In [107]:
cosine_similarity(direction_female, direction_male)


Out[107]:
array([[-1.]], dtype=float32)

In [114]:
# Compute mean
kernel_occupation_mean = np.mean(kernel_occupation, axis=0)
# Apply  x - np.sum(np.multiply(x, mean))* mean

In [119]:
for i in range(33):
#     _bias = np.abs(cosine_similarity(kernel_gender_female, kernel_occupation[i].reshape(1, -1))) + \
#                   np.abs(cosine_similarity(kernel_gender_male, kernel_occupation[i].reshape(1, -1)))

    _bias = (cosine_similarity(kernel_gender_female - kernel_gender_male, kernel_occupation[i].reshape(1, -1)))

#     _bias = cosine_similarity(direction_male, kernel_occupation[i].reshape(1, -1))

#     x = kernel_occupation[i].reshape(1, -1)
#     x = x - np.sum(np.multiply(x, kernel_occupation_mean))* kernel_occupation_mean
#     _bias = cosine_similarity(direction_male, x)

    print ('{}:{}'.format(TITLE_LABELS[i], float(_bias)))


accountant:-0.166758477688
acupuncturist:0.0150433778763
architect:-0.106728702784
attorney:-0.0355984941125
chiropractor:-0.112065583467
comedian:-0.17996160686
composer:-0.154989466071
dentist:-0.00389941781759
dietitian:0.00302037596703
dj:-0.156128510833
filmmaker:-0.116180986166
interior_designer:-0.00478803366423
journalist:-0.0217301938683
landscape_architect:-0.00763043016195
magician:-0.00733107328415
massage_therapist:-0.0116159021854
model:0.0549785941839
nurse:0.099561393261
painter:0.0174702480435
paralegal:0.0106164813042
pastor:-0.161623597145
personal_trainer:-0.133440434933
photographer:-0.0985902026296
physician:-0.00131351128221
poet:-0.061441861093
professor:0.00782079994678
psychologist:0.00208866596222
rapper:-0.112389668822
real_estate_broker:-0.000683411955833
software_engineer:-0.0237298682332
surgeon:-0.0968104675412
teacher:-0.0625882595778
yoga_teacher:0.0292760580778

In [ ]: